##########################################################
### Sample Demographics                                ###
### Title: What Is a Patriot?                          ###
### Authors: Eddy S. F. Yeung, Mengqiao Wang, Kai Quek ###
### Version: February 13, 2024                         ###
##########################################################

### Set-up ----
## Clean the R environment and set the working directory
rm(list = ls())

## Load the required packages
library(tidyverse) # version 2.0.0

## Import the datasets
df_US <- read.csv("US_patriotism_2022.csv")
df_CN <- read.csv("CN_patriotism_2022.csv")

## Drop respondents whose survey completion time is less than 5 minutes
df_US <- df_US %>% filter(Duration..in.seconds. >= 5 * 60)
df_CN <- df_CN %>% filter(as.numeric(df_CN$Duration..in.seconds.) >= 5 * 60)

### Recode individual covariates for the US dataset ----
## Age (6 categories)
df_US$age <- df_US$yob + 11
df_US <- df_US %>% 
  mutate(
    age6 = case_when(
      age >= 18 & age <= 24 ~ 1,
      age >= 25 & age <= 34 ~ 2,
      age >= 35 & age <= 44 ~ 3,
      age >= 45 & age <= 54 ~ 4,
      age >= 55 & age <= 64 ~ 5,
      age >= 65             ~ 6
    )
  )

## Income (6 categories)
df_US <- df_US %>% 
  mutate(
    inc6 = case_when(
      income >= 1 & income <= 2   ~ 1,
      income >= 3 & income <= 4   ~ 2,
      income >= 5 & income <= 7   ~ 3,
      income >= 8 & income <= 12  ~ 4,
      income >= 13 & income <= 14 ~ 5,
      income >= 15 & income <= 17 ~ 6
    )
  )

### Recode individual covariates for the CN dataset ----
## Age
df_CN$age <- as.numeric(df_CN$yob) + 11
df_CN <- df_CN %>% 
  mutate(
    age6 = case_when(
      age >= 18 & age <= 19 ~ 1,
      age >= 20 & age <= 29 ~ 2,
      age >= 30 & age <= 39 ~ 3,
      age >= 40 & age <= 49 ~ 4,
      age >= 50 & age <= 59 ~ 5,
      age >= 60             ~ 6
    )
  )

## Race (1 = Han)
df_CN$han <- ifelse(df_CN$race == 1, 1, 0)
table(df_CN$han)

## Income
df_CN$inc <- as.numeric(df_CN$income2)
median(df_CN$inc, na.rm = T)

## Region
df_CN <- df_CN %>% 
  mutate(
    region6 = case_when(
      region == 3 | region == 11 | region == 17 | region == 28 | region == 30 ~ 1,
      region == 12 | region == 20 | region == 21 ~ 2,
      region == 2 | region == 5 | region == 18 | region == 19 | region == 26 | region == 27 | region == 34 ~ 3,
      region == 7 | region == 8 | region == 10 | region == 13 | region == 14 | region == 15 | region == 16 ~ 4,
      region == 4 | region == 9 | region == 29 | region == 31 | region == 33 ~ 5,
      region == 6 | region == 23 | region == 24 | region == 25 | region == 32 ~ 6
    )
  )

### Table S1: demographic distributions of the American sample ----
## Gender
with(df_US, table(gender)) %>% prop.table() * 100

## Age
with(df_US, table(age6)) %>% prop.table() * 100

## Ethnicity
with(df_US, table(racial)) %>% prop.table() * 100

## Hispanic
with(df_US, table(hispanic)) %>% prop.table() * 100

## Income
with(df_US, table(inc6)) %>% prop.table() * 100

### Table S2: demographic distributions of the Chinese sample ----
## Gender
with(df_CN, table(gender)) %>% prop.table() * 100

## Region
with(df_CN, table(region6)) %>% prop.table() * 100

## Age
with(df_CN, table(age6)) %>% prop.table() * 100

## Ethnicity
with(df_CN, table(han)) %>% prop.table() * 100

## Household income
mean(df_CN$inc, na.rm = T)
median(df_CN$inc, na.rm = T)
